###############################################                                     
# Cause of Effect? Turnout in Hispanic Majority-Minority Districts
#  - John A. Henderson, Jasjeet S. Sekhon, and Rocio Titiunik
#  - Forthcoming in Political Analysis
#  - Replication file for <Figure XI>
#  - April 14, 2016
###############################################


############################################################################
#
## GENERATES TABLE XI: PLACEBO RESULTS FOR 2000 Open Seat Results
#
###########################################################################
        
options(width=150)
rm(list=ls())   

path='/local/'  
source(paste(path,'replicationPA/funs/headers.R',sep=''))
  
set.seed(57459)

# LOAD BASELINE AND MATCHED DATA
            
#~/ethnicity/open_seats/output/RData.WH-data-open_seats
#~/ethnicity/open_seats/output/final_runs/Matched-open_seats-trimmed.csv

#load(paste(path,"RData.WH-data-open_seats",sep='')); WH2data <- data    
load(paste(path,'replicationPA/data/open_data/OpenBaseline-aftermatching.Rdata',sep=''))
WH2data <- WHdata      

Sdata <-  read.csv(file = paste(path,"replicationPA/data/open_data/Matched-open_seats-trimmed.csv",sep=''), header = TRUE, stringsAsFactors = FALSE)
                                                                        
WH2data$phh_income99_39less  = WH2data$phh_income99_0to19    + WH2data$phh_income99_20to39
WH2data$phh_income99_40to74  = WH2data$phh_income99_40to59   + WH2data$phh_income99_60to74
WH2data$phh_income99_100plus = WH2data$phh_income99_100to199 + WH2data$phh_income99_200
WH2data$ppop_foreign         = WH2data$ppop_foreign_naturcit + WH2data$ppop_foreign_nocit

WH2data$y98_reg_nhisp		 = WH2data$y98_reg_tot - WH2data$y98_reg_hisp 
WH2data$y98_preg_nhisp		 = WH2data$y98_reg_nhisp/WH2data$y98_reg_tot

Sdata$y98_reg_nhisp			 = Sdata$y98_reg_tot - Sdata$y98_reg_hisp
Sdata$y98_preg_nhisp		 = Sdata$y98_reg_nhisp/Sdata$y98_reg_tot


# CONSTRUCT NHISP 2000 OUTCOME FOR WH2DATA    

#WH2data$y04_reg_nhisp-WH2data$y04_reg_tot-WH2data$y04_reg_hisp
#WH2data$y04_turn_nhisp=WH2data$y04_turn_tot-WH2data$y04_turn_hisp
  
# CONSTRUCT NHISP 2000 OUTCOME FOR SDATA

#Sdata$y04_reg_nhisp=Sdata$y04_reg_tot-Sdata$y04_reg_hisp
#Sdata$y04_turn_nhisp=Sdata$y04_turn_tot-Sdata$y04_turn_hisp


# Conditioning/Balance set: group most important variables first
# HERE
imp.vars <- data.frame(
Sdata$vap,
Sdata$ppop_black18,
Sdata$ppop_hispanic18,
Sdata$phh_income99_39less,
Sdata$phh_income99_40to74,
Sdata$phh_income99_100plus,                   
Sdata$ppop_25_hsless,
Sdata$ppop_foreign, 
Sdata$ppop_foreign_naturcit,
Sdata$ppop_foreign_nocit
)               

# Registration variables: include only 1998
reg.vars <- data.frame(
Sdata$y98_preg_tot,                                              
Sdata$y98_preg_hisp, # no turnout info for 1998, so this is the closest we have to previous outcome
Sdata$y98_preg_dem,
Sdata$y98_preg_rep                       
)
dim(reg.vars)
cat("Final dimension of reg.vars: ", dim(reg.vars), "\n")

# Vote variables

vote.vars <- data.frame(
Sdata$y98_pvote_ussdem,        # statewide and local offices
Sdata$y98_pvote_govdem,
Sdata$y98_pvote_cngdem,                        
Sdata$y98_pvote_assdem,
Sdata$y98_pvote_atgdem
)

dim(vote.vars)
cat("Final dimension of vote.vars: ", dim(vote.vars), "\n")

# Population variables
pop.vars <- data.frame(
Sdata$ppop_fem,
Sdata$ppop_25to44,
Sdata$ppop_45to59,
Sdata$ppop_70older
)
cat("Final dimension of pop.vars: ", dim(pop.vars), "\n")


Xall <- data.frame (imp.vars,reg.vars, vote.vars, pop.vars)  # exclude Sdata$DisTri_1991 since we will keep one unique triplet in every file
X <- data.frame (imp.vars)  # exclude Sdata$DisTri_1991 since we will keep one unique triplet in every file
B <- X
dim(X)

    
# WH2data treatment 
Tr <- WH2data$Tr
m1=c()
m1$index.treated=which(Tr==T)
m1$index.control=which(Tr==F)           
print(table(Tr)) 
    
# Sdata treatment 
Tr <- Sdata$Tr
m2=c()
m2$index.treated=which(Tr==T)
m2$index.control=which(Tr==F)           
print(table(Tr))

        
######################################################
### Results matrix
######################################################

PLACcolnm <- c("Mean Tr","Mean Co","Diff means p-val", "KS test p-val")
PLACrownm <- c(
"Hispanic Turnout 2000 WH2", 
"Hispanic Registration 2000 WH2", 
"NonHispanic Turnout 2000 WH2",
"NonHispanic Registration 2000 WH2", 

"Hispanic Turnout 2000 AM",
"Hispanic Registration 2000 AM", 
"NonHispanic Turnout 2000 AM",
"NonHispanic Registration 2000 AM"
)

RESPLAC<- matrix(data=NA,nrow=length(PLACrownm), ncol=length(PLACcolnm), dimnames = list(PLACrownm,PLACcolnm))


######################################################
### Placebo 2000 results start here: using SAME matched dataset as in placebos in file GENERATE-TABLE-WH-PLACEBO-outcomes1.R
######################################################

################################################################################
### Placebo results (i): Hispanic Registration in 2000 (as share of Hispanic VAP)
################################################################################

#BASELINE   
    
Y <- (WH2data$y00_reg_hisp/WH2data$hvap)
Y[which(WH2data$y00_reg_hisp==999999 | WH2data$hvap==0 | WH2data$hvap==999999)]<-NA     
Y[which(Y>1)] <- NA
Tr <- WH2data$Tr == 1    
          
TrWH2<-Tr[c(m1$index.treated,m1$index.control)]
YWH2<-Y[c(m1$index.treated,m1$index.control)]

#MATCHED

Y <- (Sdata$y00_reg_hisp/Sdata$hvap)
Y[which(Sdata$hvap==0 | Sdata$hvap==999999 | Sdata$y00_reg_hisp==999999)] <- NA
Y[which(Y>1)] <- NA

Y1<-Y[m2$index.treated]
Y0<-Y[m2$index.control]

Ym <- c(Y1,Y0)
Trm <- c(rep(TRUE,length(Y1)),rep(FALSE,length(Y0)))


########################
## WH2data: Most naive
#######################
cat("Top of results for WH2data \n")
t = t.test(YWH2[TrWH2], YWH2[!TrWH2])
cat("T-test:\n");  print(t)
ks = ks.boot(YWH2[TrWH2], YWH2[!TrWH2])
cat("KS Boot:\n"); 
print(ks)

RESPLAC["Hispanic Registration 2000 WH2","Mean Tr"] = t$estimate[1]
RESPLAC["Hispanic Registration 2000 WH2","Mean Co"] = t$estimate[2]
RESPLAC["Hispanic Registration 2000 WH2","Diff means p-val"] = t$p.value
RESPLAC["Hispanic Registration 2000 WH2","KS test p-val"] = ks$ks.boot.pvalue

########################
## Final matching: matching across triplets on the stacked data
#######################

cat("Top of results AFTER matching \n")
t=t.test(Ym[Trm], Ym[!Trm])
cat("T-test:\n");  print(t)
ks = ks.boot(Ym[Trm], Ym[!Trm])
cat("KS Boot:\n"); print(ks)

RESPLAC["Hispanic Registration 2000 AM","Mean Tr"] = t$estimate[1]
RESPLAC["Hispanic Registration 2000 AM","Mean Co"] = t$estimate[2]
RESPLAC["Hispanic Registration 2000 AM","Diff means p-val"] = t$p.value
RESPLAC["Hispanic Registration 2000 AM","KS test p-val"] = ks$ks.boot.pvalue


################################################################################
### Placebo results (ii): Hispanic Turnout in 2000 (as share of Hispanic Registration in 2000) 
################################################################################

#BASELINE


Y <- (WH2data$y00_turn_hisp/WH2data$y00_reg_hisp)
Y[which(WH2data$y00_turn_hisp==999999 | WH2data$y00_reg_hisp==0 | WH2data$y00_reg_hisp==999999)]<-NA
Tr <- WH2data$Tr == 1    

TrWH2<-Tr[c(m1$index.treated,m1$index.control)]
YWH2<-Y[c(m1$index.treated,m1$index.control)]


#MATCHED

Y <- (Sdata$y00_turn_hisp/Sdata$y00_reg_hisp)
Y[which(Sdata$y00_turn_hisp==999999| Sdata$y00_reg_hisp==0 | Sdata$y00_reg_hisp==999999)] <- NA

Y1<-Y[m2$index.treated]
Y0<-Y[m2$index.control]

Ym <- c(Y1,Y0)
Trm <- c(rep(TRUE,length(Y1)),rep(FALSE,length(Y0)))


########################
## WH2data: Most naive
#######################
cat("Top of results for WH2data \n")
t = t.test(YWH2[TrWH2], YWH2[!TrWH2])
cat("T-test:\n");  print(t)
ks = ks.boot(YWH2[TrWH2], YWH2[!TrWH2])
cat("KS Boot:\n"); 
print(ks)

RESPLAC["Hispanic Turnout 2000 WH2","Mean Tr"] = t$estimate[1]
RESPLAC["Hispanic Turnout 2000 WH2","Mean Co"] = t$estimate[2]
RESPLAC["Hispanic Turnout 2000 WH2","Diff means p-val"] = t$p.value
RESPLAC["Hispanic Turnout 2000 WH2","KS test p-val"] = ks$ks.boot.pvalue


########################
## Final matching: matching across triplets on the stacked data
#######################

cat("Top of results AFTER matching \n")
t=t.test(Ym[Trm], Ym[!Trm])
cat("T-test:\n");  print(t)
ks = ks.boot(Ym[Trm], Ym[!Trm])
cat("KS Boot:\n"); print(ks)

RESPLAC["Hispanic Turnout 2000 AM","Mean Tr"] = t$estimate[1]
RESPLAC["Hispanic Turnout 2000 AM","Mean Co"] = t$estimate[2]
RESPLAC["Hispanic Turnout 2000 AM","Diff means p-val"] = t$p.value
RESPLAC["Hispanic Turnout 2000 AM","KS test p-val"] = ks$ks.boot.pvalue

#cat("Hodges-Lehmann\n")
#cat("Using option exact=FALSE\n")
#a <- wilcox.exact(Ym[Trm], Ym[!Trm],exact=FALSE, paired=TRUE, conf.int=TRUE, conf.level=0.95)
#cat("H-L estimate:",a$estimate,"\n")
#cat("H-L CI:",a$conf.int,"\n")
#cat("H-L p.value:",a$p.value,"\n")

################################################################################
### Placebo results (iii): NonHispanic Turnout in 2000 (as share of NonHispanic Registration in 2000) 
################################################################################

#BASELINE


Y <- (WH2data$y00_turn_nhisp/WH2data$y00_reg_nhisp)
Y[which(WH2data$y00_turn_nhisp==999999 | WH2data$y00_reg_nhisp==0 | WH2data$y00_reg_nhisp==999999)]<-NA
Tr <- WH2data$Tr == 1    

TrWH2<-Tr[c(m1$index.treated,m1$index.control)]
YWH2<-Y[c(m1$index.treated,m1$index.control)]


#MATCHED

Y <- (Sdata$y00_turn_nhisp/Sdata$y00_reg_nhisp)
Y[which(Sdata$y00_reg_nhisp==0 | Sdata$y00_reg_nhisp==999999 | Sdata$y00_turn_nhisp==999999)] <- NA

Y1<-Y[m2$index.treated]
Y0<-Y[m2$index.control]

Ym <- c(Y1,Y0)
Trm <- c(rep(TRUE,length(Y1)),rep(FALSE,length(Y0)))
          

########################
## WH2data: Most naive
#######################
cat("Top of results for WH2data \n")
t = t.test(YWH2[TrWH2], YWH2[!TrWH2])
cat("T-test:\n");  print(t)
ks = ks.boot(YWH2[TrWH2], YWH2[!TrWH2])
cat("KS Boot:\n"); 
print(ks)

RESPLAC["NonHispanic Turnout 2000 WH2","Mean Tr"] = t$estimate[1]
RESPLAC["NonHispanic Turnout 2000 WH2","Mean Co"] = t$estimate[2]
RESPLAC["NonHispanic Turnout 2000 WH2","Diff means p-val"] = t$p.value
RESPLAC["NonHispanic Turnout 2000 WH2","KS test p-val"] = ks$ks.boot.pvalue



########################
## Final matching: matching across triplets on the stacked data
#######################

cat("Top of results AFTER matching \n")
t=t.test(Ym[Trm], Ym[!Trm])
cat("T-test:\n");  print(t)
ks = ks.boot(Ym[Trm], Ym[!Trm])
cat("KS Boot:\n"); print(ks)

RESPLAC["NonHispanic Turnout 2000 AM","Mean Tr"] = t$estimate[1]
RESPLAC["NonHispanic Turnout 2000 AM","Mean Co"] = t$estimate[2]
RESPLAC["NonHispanic Turnout 2000 AM","Diff means p-val"] = t$p.value
RESPLAC["NonHispanic Turnout 2000 AM","KS test p-val"] = ks$ks.boot.pvalue



################################################################################
### Placebo results (iv): NonHispanic Registration in 2000 (as share of NonHispanic VAP) 
################################################################################

#BASELINE


Y <- (WH2data$y00_reg_nhisp/WH2data$nhvap)
Y[which(WH2data$y00_reg_nhisp==999999 | WH2data$nhvap==0 | WH2data$nhvap==999999)]<-NA   
Y[which(Y>1)] <- NA
Tr <- WH2data$Tr == 1    
         
TrWH2<-Tr[c(m1$index.treated,m1$index.control)]
YWH2<-Y[c(m1$index.treated,m1$index.control)]



#MATCHED

Y <- (Sdata$y00_reg_nhisp/Sdata$nhvap)
Y[which(Sdata$nhvap==0 | Sdata$nhvap==999999 | Sdata$y00_reg_nhisp==999999)] <- NA
Y[which(Y>1)] <- NA

Y1<-Y[m2$index.treated]
Y0<-Y[m2$index.control]

Ym <- c(Y1,Y0)
Trm <- c(rep(TRUE,length(Y1)),rep(FALSE,length(Y0)))


########################
## WH2data: Most naive
#######################
cat("Top of results for WH2data \n")
t = t.test(YWH2[TrWH2], YWH2[!TrWH2])
cat("T-test:\n");  print(t)
ks = ks.boot(YWH2[TrWH2], YWH2[!TrWH2])
cat("KS Boot:\n"); 
print(ks)

RESPLAC["NonHispanic Registration 2000 WH2","Mean Tr"] = t$estimate[1]
RESPLAC["NonHispanic Registration 2000 WH2","Mean Co"] = t$estimate[2]
RESPLAC["NonHispanic Registration 2000 WH2","Diff means p-val"] = t$p.value
RESPLAC["NonHispanic Registration 2000 WH2","KS test p-val"] = ks$ks.boot.pvalue


########################
## Final matching: matching across triplets on the stacked data
#######################

cat("Top of results AFTER matching \n")
t=t.test(Ym[Trm], Ym[!Trm])
cat("T-test:\n");  print(t)
ks = ks.boot(Ym[Trm], Ym[!Trm])
cat("KS Boot:\n"); print(ks)

RESPLAC["NonHispanic Registration 2000 AM","Mean Tr"] = t$estimate[1]
RESPLAC["NonHispanic Registration 2000 AM","Mean Co"] = t$estimate[2]
RESPLAC["NonHispanic Registration 2000 AM","Diff means p-val"] = t$p.value
RESPLAC["NonHispanic Registration 2000 AM","KS test p-val"] = ks$ks.boot.pvalue

#cat("Hodges-Lehmann\n")
#cat("Using option exact=FALSE\n")
#a <- wilcox.exact(Ym[Trm], Ym[!Trm],exact=FALSE, paired=TRUE, conf.int=TRUE, conf.level=0.95)
#cat("H-L estimate:",a$estimate,"\n")
#cat("H-L CI:",a$conf.int,"\n")
#cat("H-L p.value:",a$p.value,"\n")



print(RESPLAC)

# END   
#####
####################### LATEX TABLES ##################
library(xtable)
library(stringr)      

baseline=RESPLAC[1:4,]
matched=RESPLAC[5:8,]

rownames(matched)=rownames(baseline)=c('Hispanic Turnout','Hispanic Registration','Non Hispanic Turnout','Non Hispanic Registration')     
  
print('--------------------')
print('Baseline')
print('--------------------')  
xtable(baseline,digits=c(1,3,3,2,2)) 

print('--------------------')
print('Matched')
print('--------------------')  
xtable(matched,digits=c(1,3,3,2,2))
                                              
################## LATEX TABLES ##################